#8/12/2015
#replication materials for Spirling "Democratization and Linguistic Complexity", JOP

rm(list=ls())

#Figure 8: look at cabinet members in  models with and without fixed effects
#--> if estimates overlap, have evidence that individual latent types are not 
#very important

############################
### Figure 8 ###############
############################

#NB: requires plm package
require(plm)

#load the data
#setwd("C:/Users/as9934/Dropbox/complexity/August2015/JOP_replication_data/")
load("bigframe.rdata")

#only look at cabinet members
big.frame<- big.frame[big.frame$cabinet==1,]
#get rid of NAs
big.frame <- big.frame[complete.cases(big.frame),] 

#make into panel data
#-> requires aggregates to means/medians per year
# regress y ~ parl time dummies
# regress y ~ parl time dummies + mp fixed effects
# IF parl time dummies are very different in second spec then 
# mp fixed effects "matter"

#first, make into yearly data
require(plyr)
yeardata  <- ddply(big.frame, .(year.dummy, mp_code), summarize, y = mean(FK_read.ease))

mod.year <- lm(y~ year.dummy , data=yeardata) #don't fit to x

#now allow fixed effects
require(plm)
#don't have to fit cabinet regressor
mod.fe <- plm(y ~  year.dummy  , data = yeardata, index = c("mp_code","year.dummy"), 
              model = "within")

#store reg output
summ.year <- summary(mod.year)
summ.year_coef <-summary(mod.year)$coef
summ.fe <- summary(mod.fe)
summ.fe_coef <- summary(mod.fe)$coef


#function to take estimate row and produce lower and upper bound (95%)
make.95<-function(x=summ.year_coef[2,]){
  low <- x[1] - (1.96*x[2]) 
  hi <- x[1] + (1.96*x[2])
  out <- as.vector(c(low,hi))
  out
}


#now we want to see if the year dummies overlap (95% CIs)
compare.frame <- data.frame(nofe.est= NA, nofe.low=NA, nofe.hi=NA, fe.est=NA , fe.low =NA , fe.hi=NA)

for(i in 1:length(row.names(summ.fe_coef))){
  
  name.no.fe <- row.names(summ.fe_coef)[i]
  compare.frame[i,1] <- summ.year_coef[name.no.fe,1] #just grab the pt estimates for the constant
  compare.frame[i,c(2,3)] <- make.95(summ.year_coef[name.no.fe,])
  
  
  compare.frame[i,4] <- summ.fe_coef[name.no.fe,1] #grab pt estimate for each one
  compare.frame[i,c(5,6)] <- make.95(summ.fe_coef[name.no.fe,])
  
  
  
}

#label rows
row.names(compare.frame) <- row.names(summ.fe_coef)


par(bg='cornsilk1')
plot(1:nrow(compare.frame), seq(min(compare.frame), max(compare.frame), length=nrow(compare.frame)), 
     type="n",xlab="session", ylab="estimate", axes=F )

axis(1, at= 1:nrow(compare.frame),labels=gsub("year.dummy","",row.names(compare.frame)))
axis(2)
box()

legend("bottomright",pch=c(16,16), lty=c(1,3), col=c("black", "red"), legend=c("w/o fixed effs","fixed effs"))

for(j in 1:nrow(compare.frame)){
  
  points(j, compare.frame[j,1], pch=16,col="black")  
  arrows(j, compare.frame[j,2], j , compare.frame[j,3], code=3, length=0)  
  
  
  points(j+.3, compare.frame[j+.3,4],pch=16, col="red")
  arrows(j+.3, compare.frame[j+.3,5], j+.3 , compare.frame[j+.3,6], code=3, length=0, col="red", lty=3, lwd=1.5)  
  
  
}
#bunch of warnings: but not an issue (come from weird call to str from 'formula' which is used in plm)